library(fpp2)
library(fpp3)
library(tsibble)
#library(forecast)

1. Loading Qcement data set from fpp2 package

# Load the qcement dataset
data(qcement)
?qcement

# Convert the qcement dataset to a tsibble object
qcement_ts <- as_tsibble(qcement, index = Quarter)

# Print the first few rows of the tsibble object
head(qcement_ts)
## # A tsibble: 6 x 2 [1Q]
##     index value
##     <qtr> <dbl>
## 1 1956 Q1 0.465
## 2 1956 Q2 0.532
## 3 1956 Q3 0.561
## 4 1956 Q4 0.57 
## 5 1957 Q1 0.529
## 6 1957 Q2 0.604
detach("package:fpp2", unload = TRUE) #deattach fpp2 packqge

The data set is quarterly time series represent the total quarterly production of Portland cement in Australia (in millions of tonnes) from 1956:Q1 to 2014:Q1(233 observations)

2. Plot the selected time series and comment the general features you can observe

# histogram for the destribution of production 
gghistogram(qcement_ts$value, 
            add.normal=TRUE,  # normal density
            add.kde=TRUE) + # kernel densit
  xlab("Sales of Cement") + # change x-axis label
  ylab("Frequency") +          # change y-axis label
  labs(title = "The distribution of production")

# ploting the production cement time series 
qcement_ts |>
  autoplot() + 
  labs(title = "Quarterly Cement production",
       subtitle = "Australian Portland",
       y = "in millios tonnes",
       x = "Time")

# Seasonal subseries plots
qcement_ts |>
  gg_subseries(value) +
  labs(
    y = "in millios tonnes",
    x = "Time",
    title = "Quarterly Cement production " 
  )

The histogram shows that the production has a normal distribution .

The plot of quarterly cement production shows :

3. Apply both the decomposition methods seen in class and plot the results. Comment the results of these two procedures.

# Additive decomposition
qcement_ts |> 
  model(
    classical_decomposition(
      value, type = "additive")
  ) -> decomp_add

additive <- components(decomp_add)   #  decomposition table
additive
## # A dable: 233 x 7 [1Q]
## # Key:     .model [1]
## # :        value = trend + seasonal + random
##    .model                     index value  trend seasonal   random season_adjust
##    <chr>                      <qtr> <dbl>  <dbl>    <dbl>    <dbl>         <dbl>
##  1 "classical_decompositio… 1956 Q1 0.465 NA      -0.144  NA               0.609
##  2 "classical_decompositio… 1956 Q2 0.532 NA       0.0228 NA               0.509
##  3 "classical_decompositio… 1956 Q3 0.561  0.54    0.0753 -0.0543          0.486
##  4 "classical_decompositio… 1956 Q4 0.57   0.557   0.0462 -0.0332          0.524
##  5 "classical_decompositio… 1957 Q1 0.529  0.571  -0.144   0.102           0.673
##  6 "classical_decompositio… 1957 Q2 0.604  0.578   0.0228  0.00324         0.581
##  7 "classical_decompositio… 1957 Q3 0.603  0.583   0.0753 -0.0549          0.528
##  8 "classical_decompositio… 1957 Q4 0.582  0.588   0.0462 -0.0520          0.536
##  9 "classical_decompositio… 1958 Q1 0.554  0.595  -0.144   0.103           0.698
## 10 "classical_decompositio… 1958 Q2 0.62   0.607   0.0228 -0.0101          0.597
## # ℹ 223 more rows
additive |>        #   additive decomposition graphs
  autoplot() 

# Multiplicative decomposition
qcement_ts |> 
  model(
    classical_decomposition(
    value, type = "multiplicative")
  ) -> decomp_mult

multiplicative <- components(decomp_mult)  #  decomposition table
multiplicative
## # A dable: 233 x 7 [1Q]
## # Key:     .model [1]
## # :        value = trend * seasonal * random
##    .model                       index value  trend seasonal random season_adjust
##    <chr>                        <qtr> <dbl>  <dbl>    <dbl>  <dbl>         <dbl>
##  1 "classical_decomposition(… 1956 Q1 0.465 NA        0.905 NA             0.514
##  2 "classical_decomposition(… 1956 Q2 0.532 NA        1.02  NA             0.524
##  3 "classical_decomposition(… 1956 Q3 0.561  0.54     1.05   0.990         0.535
##  4 "classical_decomposition(… 1956 Q4 0.57   0.557    1.03   0.994         0.553
##  5 "classical_decomposition(… 1957 Q1 0.529  0.571    0.905  1.02          0.585
##  6 "classical_decomposition(… 1957 Q2 0.604  0.578    1.02   1.03          0.594
##  7 "classical_decomposition(… 1957 Q3 0.603  0.583    1.05   0.987         0.575
##  8 "classical_decomposition(… 1957 Q4 0.582  0.588    1.03   0.962         0.565
##  9 "classical_decomposition(… 1958 Q1 0.554  0.595    0.905  1.03          0.612
## 10 "classical_decomposition(… 1958 Q2 0.62   0.607    1.02   1.00          0.610
## # ℹ 223 more rows
multiplicative |>           #   Multiplicative decomposition graphs
  autoplot()

Both of the decomposition methods applied here have a unique feature where the first and last two values for trend and random components are missing. This is because the trend is obtained by applying a “2×4-MA”(moving average of order 2) method, which requires a minimum of two periods to provide an accurate estimate.

Despite the missing values, we can observe that the trend is similar in both decomposition methods, showing several drops over time. However, there are differences in the random component between the two methods. In the additive decomposition, the random component exhibits some structure, while in the multiplicative decomposition, it appears to be more random .

# STL decomposition
qcement_ts |> 
  model(
    STL(value ~ trend(window = 5) + season(window = 5), 
        robust = TRUE)
  ) -> decomp_stl

The trend window and season window are parameters that determine the rate of change of the trend-cycle and seasonal components. Smaller values for these parameters allow for more rapid changes in the trend-cycle and seasonal components. It is important to note that both the trend and season window should be odd numbers. The trend window refers to the number of consecutive observations used to estimate the trend-cycle, while the season window refers to the number of consecutive years used to estimate each value in the seasonal component.

stl <- components(decomp_stl)       #  decomposition table
stl
## # A dable: 233 x 7 [1Q]
## # Key:     .model [1]
## # :        value = trend + season_year + remainder
##    .model                  index value trend season_year remainder season_adjust
##    <chr>                   <qtr> <dbl> <dbl>       <dbl>     <dbl>         <dbl>
##  1 STL(value ~ trend(wi… 1956 Q1 0.465 0.513    -0.0483    0               0.513
##  2 STL(value ~ trend(wi… 1956 Q2 0.532 0.529     0.00503  -0.00179         0.527
##  3 STL(value ~ trend(wi… 1956 Q3 0.561 0.560     0.0281   -0.0267          0.533
##  4 STL(value ~ trend(wi… 1956 Q4 0.57  0.560     0.0104    0               0.560
##  5 STL(value ~ trend(wi… 1957 Q1 0.529 0.560    -0.0532    0.0226          0.582
##  6 STL(value ~ trend(wi… 1957 Q2 0.604 0.577     0.0246    0.00233         0.579
##  7 STL(value ~ trend(wi… 1957 Q3 0.603 0.577     0.0260    0               0.577
##  8 STL(value ~ trend(wi… 1957 Q4 0.582 0.592     0.0102   -0.0206          0.572
##  9 STL(value ~ trend(wi… 1958 Q1 0.554 0.608    -0.0539    0               0.608
## 10 STL(value ~ trend(wi… 1958 Q2 0.62  0.613     0.00820  -0.00110         0.612
## # ℹ 223 more rows
stl |> 
  autoplot()

There is a difference in the seasonality behavior between classical and STL decomposition methods.

# STL on Log transformed data
qcement_ts <- qcement_ts |> 
  mutate(value_log = log(value))  # add variable with log transformation

qcement_ts |> 
  autoplot(value_log) + 
  labs(title = "Log Quarterly Cement production",
       subtitle = "Australian Portland",
       y = "Log",
       x = "Time")

qcement_ts |> 
  model(
    STL(value_log ~ trend(window = 5) +  season (window = 5))
  ) -> stl_log

components(stl_log) -> stl_comp_mult


components(stl_log) |> 
  autoplot()

When analyzing time series data with both trend and seasonality, it is often useful to apply STL on log transformed data. This is because the log transformation can help to stabilize the variance of the data, making it easier to identify and isolate the trend and seasonal components as we see that the remainder is more random comparing to STL.

# trend from additive decomposition 
components(decomp_add) |> 
  na.omit() |> 
  select(index, trend) -> addit_trend_coef

# trend from multiplicative decomposition 
components(decomp_mult) |> 
  na.omit() |> 
  select(index, trend) -> mult_trend_coef

# trend from stl decomposition
components(decomp_stl) |> 
  na.omit() |> 
  select(index, trend) -> stl_trend_coef

# trend from stl decomposition on log transformed data
components(stl_log) |> 
  na.omit() |> 
  select(index, trend) -> stl_log_trend_coef
row_number(stl_log_trend_coef)
##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## [217] 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
bind_cols(addit_trend_coef[,2], 
          mult_trend_coef[,2], 
          stl_trend_coef[5:233, 2], 
          stl_log_trend_coef[5:233, 2])
## # A tibble: 229 × 4
##    trend...1 trend...2 trend...3 trend...4
##        <dbl>     <dbl>     <dbl>     <dbl>
##  1     0.54      0.54      0.560    -0.552
##  2     0.557     0.557     0.577    -0.541
##  3     0.571     0.571     0.577    -0.546
##  4     0.578     0.578     0.592    -0.540
##  5     0.583     0.583     0.608    -0.520
##  6     0.588     0.588     0.613    -0.499
##  7     0.595     0.595     0.618    -0.484
##  8     0.607     0.607     0.624    -0.470
##  9     0.617     0.617     0.631    -0.454
## 10     0.626     0.626     0.646    -0.435
## # ℹ 219 more rows
# Plot data together with the trend
# 
qcement_ts |> 
  autoplot(value, color = "grey") + 
  autolayer(components(decomp_mult), trend, color = "green") +
  autolayer(components(decomp_stl), trend, color = "red") +
  labs(x ="Time", y="in millios tonnes",title = "Extracted Trends",subtitle = "Multiplicative and STL decomposition")

qcement_ts |> 
  autoplot(value, color = "grey") + 
  autolayer(components(decomp_add), trend, color = "blue") +
  autolayer(components(decomp_stl), trend, color = "red") +
  labs(x ="Time", y="in millios tonnes",title = "Extracted Trends", subtitle = "Additive and STL decomposition")

qcement_ts |> 
  autoplot(value_log, color = "grey") + 
  autolayer(components(stl_log), trend, color = "black") +
  labs(x ="Time", y="in millios tonnes",title = "Extracted Trend, STL on log transformed data")

The trend component in both additive and multiplicative decomposition methods is similar in this scenario, as both methods utilize the moving average approach. However, both methods have a shortage of data at the tails compared to the STL method.

The trend component in STL decomposition is different because it is estimated using local regression (loess). This technique allows for the estimation of the trend without the use of moving averages and can help to capture more qualified changes in the trend over time.

When using log-transformed data in the STL method, the resulting trend component can capture the underlying patterns in the time series data more effectively. This is because the log transformation can help to stabilize the variance of the data, making it easier to identify and isolate the trend and seasonal components.

# Extract seasonal component
# Additive decomposition
 components(decomp_add) |> 
  na.omit() |> 
  select(index, seasonal) -> addit_season_coef

# Multiplicative decomposition
 components(decomp_mult) |> 
  na.omit() |> 
  select(index, seasonal) -> mult_season_coef
 
# stl decomposition
 components(decomp_stl) |> 
  na.omit() |> 
  select(index, season_year) -> stl_season_coef
 
# stl decomposition on log transformed data
  components(stl_log) |> 
  na.omit() |> 
  select(index, season_year) -> stl_log_season_coef

 
 season <- bind_cols(addit_season_coef, 
                 mult_season_coef[,2],
                stl_season_coef[5:233, 2],
                 stl_log_season_coef[5:233, 2])

 season
## # A tsibble: 229 x 5 [1Q]
##      index seasonal...2 seasonal...3 season_year...4 season_year...5
##      <qtr>        <dbl>        <dbl>           <dbl>           <dbl>
##  1 1956 Q3       0.0753        1.05         -0.0532          -0.0867
##  2 1956 Q4       0.0462        1.03          0.0246           0.0253
##  3 1957 Q1      -0.144         0.905         0.0260           0.0449
##  4 1957 Q2       0.0228        1.02          0.0102           0.0142
##  5 1957 Q3       0.0753        1.05         -0.0539          -0.0848
##  6 1957 Q4       0.0462        1.03          0.00820          0.0289
##  7 1958 Q1      -0.144         0.905         0.0281           0.0440
##  8 1958 Q2       0.0228        1.02          0.0126           0.0130
##  9 1958 Q3       0.0753        1.05         -0.0577          -0.0887
## 10 1958 Q4       0.0462        1.03          0.0261           0.0262
## # ℹ 219 more rows

The values in the seasonal component column for each decomposition method are different, reflecting the differences in the methods used to extract the seasonal component.The ‘seasonal2’ and ‘seasonal...3’ columns have similar values repeated over each year , suggesting that the seasonal patterns may be consistent across different years in classical decomposition. which is not the case for STL decomposition where seasonal components for same season differ.

4. Explore different models of Exponential Smoothing (ETS), comment your results and select the model that you consider the best in fitting the series. Argument your model’s choice and further justify it by performing a residuals analysis and validating the model’s assumptions using the required statistical testing procedures.

# fitting different ETS methods 
fit <- qcement_ts |> 
  model(
    H_W_additive = ETS(value ~ error("A") + trend("A") +
                         season("A") ),
    H_W_multiplicative = ETS(value ~ error("M") + trend("A") +
                               season("M") ),
    H_W_additive_damped = ETS(value ~ error("A") + trend("Ad") +
                             season("A")),
    H_W_multiplicative_damped = ETS(value~ error("M") + trend("Ad") +
                                   season("M"))
    )

The reason for choosing Holt-Winters’ method is the data exhibit trend and seasonal components .

components(fit[,1]) |>    # additive 
  autoplot()

components(fit[,2]) |>   #  multiplicative 
  autoplot() 

components(fit[,3]) |>    # additive damped
  autoplot()

components(fit[,4]) |>   #  multiplicative damped 
  autoplot() 

components(fit) |>   #  multiplicative damped 
  autoplot() 

In every HW method applied we can see 5 different decomposition :

  1. The value remains constant in every method as it represents the actual measured value.

  2. The level or trend indicates an increasing pattern due to high demand, and there are some damps in all methods applied with slightly different because of the different approaches used (additive or multiplicative smoothing).

  3. The slope reflects the fluctuation or changing the direction in the trend, and multiplicative methods can better capture this change compared to other methods.

  4. Both multiplicative and multiplicative damped methods exhibit a nice seasonal pattern.

  5. The remainder represents the difference between the observed values and the fitted values, and the multiplicative methods exhibit more randomness in the residuals compared to the additive method.

report(fit[,1])         # Holt-Winters additive report
## Series: value 
## Model: ETS(A,A,A) 
##   Smoothing parameters:
##     alpha = 0.6281542 
##     beta  = 0.008504617 
##     gamma = 0.2007587 
## 
##   Initial states:
##       l[0]       b[0]        s[0]      s[-1]     s[-2]       s[-3]
##  0.5159829 0.01504697 -0.01162037 0.02647345 0.0295095 -0.04436257
## 
##   sigma^2:  0.0074
## 
##      AIC     AICc      BIC 
## 135.3290 136.1361 166.3883
report(fit[,2])         # Holt-Winters multiplicative report
## Series: value 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.7504791 
##     beta  = 0.002970008 
##     gamma = 0.0001000013 
## 
##   Initial states:
##       l[0]        b[0]     s[0]    s[-1]   s[-2]    s[-3]
##  0.5042987 0.008074177 1.029664 1.048807 1.01635 0.905178
## 
##   sigma^2:  0.0022
## 
##       AIC      AICc       BIC 
##  6.783846  7.591021 37.843192
report(fit[,3])         # Holt-Winters additive damped report
## Series: value 
## Model: ETS(A,Ad,A) 
##   Smoothing parameters:
##     alpha = 0.6378078 
##     beta  = 0.0001000012 
##     gamma = 0.2031197 
##     phi   = 0.9004992 
## 
##   Initial states:
##      l[0]       b[0]        s[0]      s[-1]      s[-2]       s[-3]
##  0.488776 0.02524082 -0.01365153 0.03338637 0.02328536 -0.04302021
## 
##   sigma^2:  0.0074
## 
##      AIC     AICc      BIC 
## 138.6177 139.6087 173.1281
report(fit[,4])         # Holt-Winters multiplicative damped report
## Series: value 
## Model: ETS(M,Ad,M) 
##   Smoothing parameters:
##     alpha = 0.6845085 
##     beta  = 0.01735589 
##     gamma = 0.0001000002 
##     phi   = 0.9445062 
## 
##   Initial states:
##       l[0]       b[0]     s[0]    s[-1]    s[-2]     s[-3]
##  0.4884785 0.02348091 1.030108 1.048898 1.015958 0.9050364
## 
##   sigma^2:  0.0024
## 
##      AIC     AICc      BIC 
## 17.77544 18.76643 52.28582
accuracy(fit)            #accuracy measures for all Holt-Winters method applied 
## # A tibble: 4 × 10
##   .model         .type       ME   RMSE    MAE     MPE  MAPE  MASE RMSSE     ACF1
##   <chr>          <chr>    <dbl>  <dbl>  <dbl>   <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 H_W_additive   Trai… -2.65e-3 0.0843 0.0597 -0.488   3.87 0.585 0.630  0.0196 
## 2 H_W_multiplic… Trai…  6.24e-4 0.0783 0.0560 -0.0878  3.61 0.549 0.585 -0.0363 
## 3 H_W_additive_… Trai…  1.12e-2 0.0845 0.0606  0.595   3.90 0.594 0.631  0.00482
## 4 H_W_multiplic… Trai…  7.42e-3 0.0792 0.0567  0.328   3.66 0.555 0.592  0.0313

Thus the best model fitting the series is Holt-Winters multiplicative .

Residuals analysis

# Holt-Winters additive
# extract residuals
residuals_addi <- as.data.frame(components(fit[,1])$remainder)
colnames(residuals_addi)[1]<-"residual"
# histogram of residuals
ggplot(data = residuals_addi, aes(x = residual)) +
  geom_histogram(aes(y = ..density..), color = "black", binwidth = 0.02, fill = "lightblue") +
  geom_density(color = "red", fill = "lightblue", alpha = 0.5) +
  stat_function(fun = dnorm, args = list(mean = mean(residuals_addi$residual), sd = sd(residuals_addi$residual)), 
                color = "black", linetype = "dashed") +
  labs(x = "Residuals", y = "Density", title = "Histogram and Density Plot of Residuals H.W (A,A,A)")

# Plot normal QQ plot of residuals
ggplot(residuals_addi, aes(sample = residual)) + 
  geom_qq() +
  stat_qq_line(color = "red") +
  labs(x = "Theoretical Quantiles", y = "Sample Quantlies",title = "Normal QQ Plot of Residuals H.W (A,A,A)") 

# Shapiro-Wilk test of normality
shapiro.test(components(fit[,1])$remainder)
## 
##  Shapiro-Wilk normality test
## 
## data:  components(fit[, 1])$remainder
## W = 0.96818, p-value = 4.413e-05
# plot the residuals
plot(residuals_addi$residual, main="Residuals of H.W (A,A,A)", ylab="Residuals", xlab="Time")

plot.ts(residuals_addi$residual, main="Residuals of H.W (A,A,A)", ylab="Residuals", xlab="Time")

# t-test for the average
t.test(residuals_addi$residual)
## 
##  One Sample t-test
## 
## data:  residuals_addi$residual
## t = -0.47857, df = 232, p-value = 0.6327
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.013541437  0.008248679
## sample estimates:
##    mean of x 
## -0.002646379
# Holt-Winters multiplicative
# extract residuals
residuals_multi <- as.data.frame(components(fit[,2])$remainder)
colnames(residuals_multi)[1]<-"residual"
# histogram of residuals
ggplot(data = residuals_multi, aes(x = residual)) +
  geom_histogram(aes(y = ..density..), binwidth = 0.02, color = "black", fill = "lightblue") +
  geom_density(color = "red", fill = "lightblue", alpha = 0.7) +
  theme_classic() +
  labs(title = "Histogram of Residuals H.W(M,A,M)", x = "Residuals", y = "Frequency")

# Plot normal QQ plot of residuals
ggplot(residuals_multi, aes(sample = residual)) + 
  geom_qq() +
  stat_qq_line(color = "red") +
  labs(x = "Theoretical Quantiles", y = "Sample Quantlies",title = "Normal QQ Plot of Residuals H.W(M,A,M)") 

# Shapiro-Wilk test of normality
shapiro.test(components(fit[,2])$remainder)
## 
##  Shapiro-Wilk normality test
## 
## data:  components(fit[, 2])$remainder
## W = 0.99589, p-value = 0.7955
# plot the residuals
plot(residuals_multi$residual, main="Residuals of H.W(M,A,M)", ylab="Residuals", xlab="Time")

plot.ts(residuals_multi$residual, main="Residuals of H.W(M,A,M)", ylab="Residuals", xlab="Time")

# t-test for the average
t.test(residuals_multi$residual)
## 
##  One Sample t-test
## 
## data:  residuals_multi$residual
## t = 0.42295, df = 232, p-value = 0.6727
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.004719568  0.007299746
## sample estimates:
##   mean of x 
## 0.001290089
# Holt-Winters additive damped
# extract residuals
residuals_addi_damped <- as.data.frame(components(fit[,3])$remainder)
colnames(residuals_addi_damped)[1]<-"residual"
# histogram of residuals
ggplot(data = residuals_addi_damped, aes(x = residual)) +
  geom_histogram(aes(y = ..density..), binwidth = 0.02, color = "black", fill = "lightblue") +
  geom_density(color = "red", fill = "lightblue", alpha = 0.7) +
  labs(x = "Residuals", y = "Frequency", title = "Histogram of Residuals H.W(A,Ad,A)") +
  theme_classic()

# Plot normal QQ plot of residuals
ggplot(residuals_addi_damped, aes(sample = residual)) + 
  geom_qq() +
  stat_qq_line(color = "red") +
  labs(x = "Theoretical Quantiles", y = "Sample Quantlies",title = "Normal QQ Plot of Residuals H.W(A,Ad,A)") 

# Shapiro-Wilk test of normality
shapiro.test(components(fit[,3])$remainder)
## 
##  Shapiro-Wilk normality test
## 
## data:  components(fit[, 3])$remainder
## W = 0.96805, p-value = 4.24e-05
# plot the residuals
plot(residuals_addi_damped$residual, main="Residuals of H.W(A,Ad,A)", ylab="Residuals", xlab="Time")

plot.ts(residuals_addi_damped$residual, main="Residuals of H.W(A,Ad,A)", ylab="Residuals", xlab="Time")

# t-test for the average
t.test(residuals_addi_damped$residual)
## 
##  One Sample t-test
## 
## data:  residuals_addi_damped$residual
## t = 2.0298, df = 232, p-value = 0.04352
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.0003274365 0.0219971141
## sample estimates:
##  mean of x 
## 0.01116228
# Holt-Winters multiplicative damped
# extract residuals
residuals_multi_damped <- as.data.frame(components(fit[,4])$remainder)
colnames(residuals_multi_damped)[1]<-"residual"
# histogram of residuals
ggplot(data = residuals_multi_damped, aes(x = residual)) +
  geom_histogram(aes(y = ..density..), binwidth = 0.02, color = "black", fill = "lightblue") +
  geom_density(color = "red", fill = "lightblue", alpha = 0.7) +
  labs(x = "Residuals", y = "Frequency", title = "Histogram of Residuals H.W(M,Ad,M)") +
  theme_classic()

# Plot normal QQ plot of residuals
ggplot(residuals_multi_damped, aes(sample = residual)) + 
  geom_qq() +
  stat_qq_line(color = "red") +
  labs(x = "Theoretical Quantiles", y = "Sample Quantlies",title = "Normal QQ Plot of Residuals H.W(M,Ad,M)") 

# Shapiro-Wilk test of normality
shapiro.test(components(fit[,4])$remainder)
## 
##  Shapiro-Wilk normality test
## 
## data:  components(fit[, 4])$remainder
## W = 0.99594, p-value = 0.8025
# plot the residuals
plot(residuals_multi_damped$residual, main="Residuals of H.W(M,Ad,M)", ylab="Residuals", xlab="Time")

plot.ts(residuals_multi_damped$residual, main="Residuals of H.W(M,Ad,M)", ylab="Residuals", xlab="Time")

# t-test for the average
t.test(residuals_multi_damped$residual)
## 
##  One Sample t-test
## 
## data:  residuals_multi_damped$residual
## t = 1.7827, df = 232, p-value = 0.07594
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.0005819034  0.0116444058
## sample estimates:
##   mean of x 
## 0.005531251

Comparing residuals :

Both additives methods :

  • Histogram of residuals shows that the residual in both are not normally distributed

  • in normal QQ plot, the residuals do not follow a straight line

  • For additive p-value is very small, which indicates that the null hypothesis of normality is rejected at a significance level of 0.05. This means that the residuals are not normally distributed. Same result for additive-damped .

  • Residuals around zero .However. they exhibit structure .

  • T-test show that the P-value is 0.6327 for additive therefore the result is not statistically significant and we cannot conclude that the residuals have a significant non-zero mean. On other hand, P-value is 0.04352 for additive_damped we reject the null hypothesis of zero mean .

  • The 95% confidence interval for the mean of the residuals is (-0.013541437 , 0.008248679) .Since the interval contains zero, this further supports the conclusion that the mean of the residuals is not significantly different from zero for additive. While for additive_damped (0.0003274365 , 0.0219971141)

Both multiplicative methods :

  • Histogram of residuals shows that the residuals are normally distributed

  • in normal QQ plot, the residuals follows a straight line

  • The null hypothesis for the Shapiro-Wilk test is that the data are normally distributed. The p-value of 0.7955 for multiplicative and 0.8025 multiplicative_damped indicate that we do not have enough evidence to reject the null hypothesis. Therefore, we can assume that the residuals are normally distributed.

  • Residuals around zero . No exhibit structure meaning randomly distributed .

  • The result of the test shows a t-value of 0.42295 and a p-value of 0.6727.Hence, we fail to reject the null hypothesis and conclude that there is no evidence that the mean of the residuals is significantly different from zero.

  • The 95% confidence interval for the mean of the residuals is (-0.004719568, 0.007299746) for multiplicative and (-0.0005819034 , 0.0116444058) for multiplicative_damped .Since the interval contains zero, this further supports the conclusion that the mean of the residuals is not significantly different from zero.

Based on this analysis we can confirm that the multiplicative methods are best for fitting out data . However, the best model fitting the series is Holt-Winters multiplicative(M,A,M) since it has lower RMSE and AICc .

5. Compute a h = 10 forecast and prediction intervals for different levels: 80%, 90%, 95% and 99%.

# Forecast 10 steps ahead on the best model Holt-Winters multiplicative
holt_w_fc <- fit[,2] |> 
     forecast(h = 10)  
holt_w_fc
## # A fable: 10 x 4 [1Q]
## # Key:     .model [1]
##    .model               index         value .mean
##    <chr>                <qtr>        <dist> <dbl>
##  1 H_W_multiplicative 2014 Q2 N(2.5, 0.014)  2.52
##  2 H_W_multiplicative 2014 Q3 N(2.6, 0.024)  2.61
##  3 H_W_multiplicative 2014 Q4 N(2.6, 0.032)  2.57
##  4 H_W_multiplicative 2015 Q1 N(2.3, 0.031)  2.27
##  5 H_W_multiplicative 2015 Q2 N(2.6, 0.048)  2.56
##  6 H_W_multiplicative 2015 Q3  N(2.6, 0.06)  2.65
##  7 H_W_multiplicative 2015 Q4 N(2.6, 0.067)  2.61
##  8 H_W_multiplicative 2016 Q1 N(2.3, 0.059)  2.30
##  9 H_W_multiplicative 2016 Q2 N(2.6, 0.083)  2.59
## 10 H_W_multiplicative 2016 Q3 N(2.7, 0.099)  2.68
# Prediction interval
lower <- hilo(holt_w_fc, level = 80)
lower$`80%`$lower
##  [1] 2.368825 2.412978 2.344302 2.042601 2.275565 2.331746 2.274496 1.987662
##  [9] 2.219469 2.278487
hilo(holt_w_fc, level = 90)
## # A tsibble: 10 x 5 [1Q]
## # Key:       .model [1]
##    .model               index         value .mean                  `90%`
##    <chr>                <qtr>        <dist> <dbl>                 <hilo>
##  1 H_W_multiplicative 2014 Q2 N(2.5, 0.014)  2.52 [2.325495, 2.717845]90
##  2 H_W_multiplicative 2014 Q3 N(2.6, 0.024)  2.61 [2.356851, 2.865085]90
##  3 H_W_multiplicative 2014 Q4 N(2.6, 0.032)  2.57 [2.279780, 2.864032]90
##  4 H_W_multiplicative 2015 Q1 N(2.3, 0.031)  2.27 [1.978556, 2.558489]90
##  5 H_W_multiplicative 2015 Q2 N(2.6, 0.048)  2.56 [2.196167, 2.915115]90
##  6 H_W_multiplicative 2015 Q3  N(2.6, 0.06)  2.65 [2.242652, 3.049395]90
##  7 H_W_multiplicative 2015 Q4 N(2.6, 0.067)  2.61 [2.180428, 3.032216]90
##  8 H_W_multiplicative 2016 Q1 N(2.3, 0.059)  2.30 [1.899465, 2.698089]90
##  9 H_W_multiplicative 2016 Q2 N(2.6, 0.083)  2.59 [2.114539, 3.064686]90
## 10 H_W_multiplicative 2016 Q3 N(2.7, 0.099)  2.68 [2.164358, 3.197802]90
hilo(holt_w_fc, level = 95)
## # A tsibble: 10 x 5 [1Q]
## # Key:       .model [1]
##    .model               index         value .mean                  `95%`
##    <chr>                <qtr>        <dist> <dbl>                 <hilo>
##  1 H_W_multiplicative 2014 Q2 N(2.5, 0.014)  2.52 [2.287913, 2.755427]95
##  2 H_W_multiplicative 2014 Q3 N(2.6, 0.024)  2.61 [2.308169, 2.913767]95
##  3 H_W_multiplicative 2014 Q4 N(2.6, 0.032)  2.57 [2.223816, 2.919996]95
##  4 H_W_multiplicative 2015 Q1 N(2.3, 0.031)  2.27 [1.923006, 2.614039]95
##  5 H_W_multiplicative 2015 Q2 N(2.6, 0.048)  2.56 [2.127301, 2.983981]95
##  6 H_W_multiplicative 2015 Q3  N(2.6, 0.06)  2.65 [2.165377, 3.126670]95
##  7 H_W_multiplicative 2015 Q4 N(2.6, 0.067)  2.61 [2.098838, 3.113806]95
##  8 H_W_multiplicative 2016 Q1 N(2.3, 0.059)  2.30 [1.822968, 2.774587]95
##  9 H_W_multiplicative 2016 Q2 N(2.6, 0.083)  2.59 [2.023527, 3.155697]95
## 10 H_W_multiplicative 2016 Q3 N(2.7, 0.099)  2.68 [2.065368, 3.296792]95
hilo(holt_w_fc, level = 99)
## # A tsibble: 10 x 5 [1Q]
## # Key:       .model [1]
##    .model               index         value .mean                  `99%`
##    <chr>                <qtr>        <dist> <dbl>                 <hilo>
##  1 H_W_multiplicative 2014 Q2 N(2.5, 0.014)  2.52 [2.214461, 2.828879]99
##  2 H_W_multiplicative 2014 Q3 N(2.6, 0.024)  2.61 [2.213022, 3.008913]99
##  3 H_W_multiplicative 2014 Q4 N(2.6, 0.032)  2.57 [2.114438, 3.029373]99
##  4 H_W_multiplicative 2015 Q1 N(2.3, 0.031)  2.27 [1.814437, 2.722608]99
##  5 H_W_multiplicative 2015 Q2 N(2.6, 0.048)  2.56 [1.992707, 3.118575]99
##  6 H_W_multiplicative 2015 Q3  N(2.6, 0.06)  2.65 [2.014347, 3.277700]99
##  7 H_W_multiplicative 2015 Q4 N(2.6, 0.067)  2.61 [1.939375, 3.273269]99
##  8 H_W_multiplicative 2016 Q1 N(2.3, 0.059)  2.30 [1.673458, 2.924097]99
##  9 H_W_multiplicative 2016 Q2 N(2.6, 0.083)  2.59 [1.845650, 3.333574]99
## 10 H_W_multiplicative 2016 Q3 N(2.7, 0.099)  2.68 [1.871897, 3.490263]99
# Plot forecast and prediction intervals
holt_w_fc |> 
  autoplot(qcement_ts, level = c(80,90,95,99)) + 
    geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(fit[,2])) +
  labs(x ="Time", y="in millios tonnes",title = "Forecasts quarterly cement production") +
  guides(colour = "none")

holt_w_fc |> 
  autoplot(qcement_ts, level = 80) + 
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(fit[,2])) +
  labs(x ="Time", y="in millios tonnes",title = "Forecasts at level 80% for quarterly cement   production") +
  guides(colour = "none")

holt_w_fc |> 
  autoplot(qcement_ts, level = 90) + 
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(fit[,2])) +
  labs(x ="Time", y="in millios tonnes",title = "Forecasts at level 90 % for quarterly cement   production") +
  guides(colour = "none")

holt_w_fc |> 
  autoplot(qcement_ts, level = 95) + 
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(fit[,2])) +
  labs(x ="Time", y="in millios tonnes",title = "Forecasts at level 95% for quarterly cement   production") +
  guides(colour = "none")

holt_w_fc |> 
  autoplot(qcement_ts, level = 99) + 
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(fit[,2])) +
  labs(x ="Time", y="in millios tonnes",title = "Forecasts at level 99% for quarterly cement   production") +
  guides(colour = "none")

The shaded regions in the plot represent the prediction intervals, and the color intensity reflects the associated probability. As the level increases, the intervals widen due to the rise of uncertainty. The black line represents the actual data, while the orange curve represents the one-step-ahead fitted values.

6. Compute a h = 10 forecast and prediction intervals for the same levels of the previous point, this time by using bootstrapping: a) assuming normality of residuals and b) sampling based on your fitted model’s residuals.

# Identify α, β, σ, the last l and b 

a <- unlist(report(fit[,2]))
## Series: value 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.7504791 
##     beta  = 0.002970008 
##     gamma = 0.0001000013 
## 
##   Initial states:
##       l[0]        b[0]     s[0]    s[-1]   s[-2]    s[-3]
##  0.5042987 0.008074177 1.029664 1.048807 1.01635 0.905178
## 
##   sigma^2:  0.0022
## 
##       AIC      AICc       BIC 
##  6.783846  7.591021 37.843192
# last l

l.233 <- a$H_W_multiplicative.fit.states.l234       # 2.4727

# last b
b.233 <- a$H_W_multiplicative.fit.states.b234          # 0.008356
s.233 <- a$H_W_multiplicative.fit.states.s232

# alpha
alpha <- a$H_W_multiplicative.fit.par.estimate1   # 0.7505

# beta
beta <- a$H_W_multiplicative.fit.par.estimate2     # 0.00297 

# gamma
gamma <- a$H_W_multiplicative.fit.par.estimate3   # 0.0001

# sigma
sigma <- sqrt(a$H_W_multiplicative.fit.fit.sigma2) # 0.0473
set.seed(123) # set the random seed to have always the same random numbers generation

h.step <- 10 # 10-step ahead
R <- 500     # 500 replicates
l.mat <- matrix(nr=h.step, nc=R)
b.mat <- matrix(nr=h.step, nc=R)
y <- matrix(nr=h.step, nc=R)
s <- matrix(nr=h.step, nc=R)
resid <- components(fit[,2])$remainder
eps <- sample(size=R, x=resid, replace = TRUE)
y[1,] <- (l.233 + b.233)*s.233*(1 + eps)
l.mat[1,] <- (l.233 + b.233)*(1 + alpha*eps)
b.mat[1,] <- b.233+beta*(l.233 + b.233)*eps
s[1,] <- s.233*(1 + gamma*eps)

We will use these formulas to calculate the next forecast . While Yt represent the forecast , Lt the level , Bt the slope and St season.

## the loop for the next steps

for (h in 2:10){
   eps <- sample(size=R, x=resid, replace = TRUE)
   y[h,] <- (l.mat[h-1,] + b.mat[h-1,])*s[h-1,]*(1 + eps)
   l.mat[h,] <- (l.mat[h-1,] + b.mat[h-1,])*(1 + alpha*eps)
   b.mat[h,] <- b.mat[h-1,]+ beta*(l.mat[h-1,] + b.mat[h-1,])*eps
   s[h,] <- s[h-1,]*(1 + gamma*eps)
}

# Set up plot with appropriate axis labels and title
plot(qcement_ts$value, type = "l", xlim = c(0, 250), ylim = c(0.5, 3.5),
     xlab = "Time", ylab = "Cement Production", main = "Simulated Forecast Scenarios")
# Add a legend to the plot
legend("topleft", legend = c("Actual", "Simulated Scenarios"), lty = c(1, 1), 
       col = c("black", "blue"), bty = "n", cex = 0.8)
# Add simulated scenarios to the plot using lines
for (j in c(1:500)) {
  lines(y[, j] ~ c(234:243), col = "blue", lwd = 0.5)
}

# Calculate prediction intervals for different confidence levels
pi_80 <- apply(y, 1, quantile, probs=c(0.1, 0.9), na.rm = T)
pi_90 <- apply(y, 1, quantile, probs=c(0.05, 0.95), na.rm = T)
pi_95 <- apply(y, 1, quantile, probs=c(0.025, 0.975), na.rm = T)
pi_99 <- apply(y, 1, quantile, probs=c(0.005, 0.995), na.rm = T)

# Set up plot with appropriate axis labels and title
plot(qcement_ts$value, type = "l", xlim = c(0, 250), ylim = c(0.5, 3.5),
     xlab = "Time", ylab = "Cement Production", main = "Simulated Forecast Scenarios")

# Add a legend to the plot
legend("topleft", legend = c("Actual", "Simulated Scenarios", "80% interval","90% interval","95% interval","99% interval"), lty = c(1, 1), 
       col = c("black", "blue", "red","green","yellow","orange"), bty = "n", cex = 0.8)

# Add simulated scenarios to the plot using lines
for (j in c(1:500)) {
  lines(y[, j] ~ c(234:243), col = "blue", lwd = 0.5)
}
lines(pi_80[1,] ~ c(234:243), type='l', lwd=2, col="red")
lines(pi_80[2,] ~ c(234:243), type='l', lwd=2, col="red")
lines(pi_90[1,]~ c(234:243), type='l', lwd=2, col="green")
lines(pi_90[2,]~ c(234:243), type='l', lwd=2, col="green")
lines(pi_95[1,]~ c(234:243), type='l', lwd=2, col="yellow")
lines(pi_95[2,]~ c(234:243), type='l', lwd=2, col="yellow")
lines(pi_99[1,]~ c(234:243), type='l', lwd=2, col="orange")
lines(pi_99[2,]~ c(234:243), type='l', lwd=2, col="orange")

As the level increases, the width of the intervals also increases. Furthermore, the amount of uncertainty grows larger as we move further into the future. Consequently, the intervals become wider towards the end of the forecast period.

# Extract lower and upper bounds from hilo function
lower_hilo_80 <- hilo(holt_w_fc, level = 80)$`80%`$lower
upper_hilo_80 <- hilo(holt_w_fc, level = 80)$`80%`$upper

# Extract lower and upper bounds from apply function
pi_80 <- apply(y, 1, quantile, probs=c(0.1, 0.9), na.rm = T)
lower_apply_80 <- pi_80[1,]
upper_apply_80 <- pi_80[2,]

# Create a data frame with the interval bounds
intervals_80 <- data.frame(x = c(1:length(lower_hilo_80)), 
                        lower_hilo = lower_hilo_80,
                        upper_hilo = upper_hilo_80,
                        lower_apply = lower_apply_80,
                        upper_apply = upper_apply_80)


# Create a plot
ggplot(intervals_80, aes(x = x)) +
  geom_ribbon(aes(ymin = lower_hilo, ymax = upper_hilo, fill = "Parametric"), alpha = 0.3) +
  geom_ribbon(aes(ymin = lower_apply, ymax = upper_apply, fill = "Bootstrap"), alpha = 0.3) +
  scale_x_continuous(name = "Time", breaks = seq(0, 10, by = 1)) +
  scale_y_continuous(name = "Cement Production") +
  theme_bw() +
  labs(fill = "Method", title = "Comparison of Prediction Intervals", subtitle = "80%", caption = NULL)

# Extract lower and upper bounds from hilo function
lower_hilo_90 <- hilo(holt_w_fc, level = 90)$`90%`$lower
upper_hilo_90 <- hilo(holt_w_fc, level = 90)$`90%`$upper

# Extract lower and upper bounds from apply function
pi_90 <- apply(y, 1, quantile, probs=c(0.05, 0.95), na.rm = T)
lower_apply_90 <- pi_90[1,]
upper_apply_90 <- pi_90[2,]

# Create a data frame with the interval bounds
intervals_90 <- data.frame(x = c(1:length(lower_hilo_90)), 
                        lower_hilo = lower_hilo_90,
                        upper_hilo = upper_hilo_90,
                        lower_apply = lower_apply_90,
                        upper_apply = upper_apply_90)

# Create a plot
ggplot(intervals_90, aes(x = x)) +
  geom_ribbon(aes(ymin = lower_hilo, ymax = upper_hilo, fill = "Parametric"), alpha = 0.3) +
  geom_ribbon(aes(ymin = lower_apply, ymax = upper_apply, fill = "Bootstrap"), alpha = 0.3) +
  scale_x_continuous(name = "Time", breaks = seq(0, 10, by = 1)) +
  scale_y_continuous(name = "Cement Production") +
  theme_bw() +
  labs(fill = "Method", title = "Comparison of Prediction Intervals", subtitle = "90%", caption = NULL)

# Extract lower and upper bounds from hilo function
lower_hilo_95 <- hilo(holt_w_fc, level = 95)$`95%`$lower
upper_hilo_95 <- hilo(holt_w_fc, level = 95)$`95%`$upper

# Extract lower and upper bounds from apply function
pi_95 <- apply(y, 1, quantile, probs=c(0.025, 0.975), na.rm = T)
lower_apply_95 <- pi_95[1,]
upper_apply_95 <- pi_95[2,]

# Create a data frame with the interval bounds
intervals_95 <- data.frame(x = c(1:length(lower_hilo_95)), 
                        lower_hilo = lower_hilo_95,
                        upper_hilo = upper_hilo_95,
                        lower_apply = lower_apply_95,
                        upper_apply = upper_apply_95)

# Create a plot
ggplot(intervals_95, aes(x = x)) +
  geom_ribbon(aes(ymin = lower_hilo, ymax = upper_hilo, fill = "Parametric"), alpha = 0.3) +
  geom_ribbon(aes(ymin = lower_apply, ymax = upper_apply, fill = "Bootstrap"), alpha = 0.3) +
  scale_x_continuous(name = "Time", breaks = seq(0, 10, by = 1)) +
  scale_y_continuous(name = "Cement Production") +
  theme_bw() +
  labs(fill = "Method", title = "Comparison of Prediction Intervals", subtitle = "95%", caption = NULL)

# Extract lower and upper bounds from hilo function
lower_hilo_99 <- hilo(holt_w_fc, level = 99)$`99%`$lower
upper_hilo_99 <- hilo(holt_w_fc, level = 99)$`99%`$upper

# Extract lower and upper bounds from apply function
pi_99 <- apply(y, 1, quantile, probs=c(0.005, 0.995), na.rm = T)
lower_apply_99 <- pi_99[1,]
upper_apply_99<- pi_99[2,]

# Create a data frame with the interval bounds
intervals_99 <- data.frame(x = c(1:length(lower_hilo_99)), 
                        lower_hilo = lower_hilo_99,
                        upper_hilo = upper_hilo_99,
                        lower_apply = lower_apply_99,
                        upper_apply = upper_apply_99)


# Create a plot
ggplot(intervals_99, aes(x = x)) +
  geom_ribbon(aes(ymin = lower_hilo, ymax = upper_hilo, fill = "Parametric"), alpha = 0.3) +
  geom_ribbon(aes(ymin = lower_apply, ymax = upper_apply, fill = "Bootstrap"), alpha = 0.3) +
  scale_x_continuous(name = "Time", breaks = seq(0, 10, by = 1)) +
  scale_y_continuous(name = "Cement Production") +
  theme_bw() +
  labs(fill = "Method", title = "Comparison of Prediction Intervals", subtitle = "99%", caption = NULL)